library(readxl)
library(readr)
library(dplyr)
library(ggplot2)
library(stringr)
library(coefplot)
library(tidyr)
library(xtable)
library(stargazer)
library(ggpubr)
library(estimatr)

#------------------------------------------------------------------------------------------
## Method 3. NTUSD Sentiment Scores Using CMCOR Data
#------------------------------------------------------------------------------------------

# Load files

# Mobilization Campaigns
India_1962 <- read_excel("India_1962.xlsx", range = cell_cols("B:V"))
Soviet_1969 <- read_excel("Soviet_1969.xlsx", range = cell_cols("B:V"))
Vietnam_1974 <- read_excel("Vietnam_1974.xlsx", range = cell_cols("B:V"))
Vietnam_1979 <- read_excel("Vietnam_1979.xlsx", range = cell_cols("B:V"))

# Pacification Campaigns
Japan_1990 <- read_excel("Japan_1990.xlsx", range = cell_cols("B:V"))
Japan_1996 <- read_excel("Japan_1996.xlsx", range = cell_cols("B:V"))
Japan_2005 <- read_excel("Japan_2005.xlsx", range = cell_cols("B:V"))
Japan_2010 <- read_excel("Japan_2010.xlsx", range = cell_cols("B:V"))
Japan_2012 <- read_excel("Japan_2012.xlsx", range = cell_cols("B:V"))
Philippines_2016 <- read_excel("Philippines_2016.xlsx", range = cell_cols("B:V"))
India_2017 <- read_excel("India_2017.xlsx", range = cell_cols("B:V"))

# Combined by type of campaigns
Mob <- rbind(India_1962,Soviet_1969,Vietnam_1974,Vietnam_1979)
Pac <- rbind(Japan_1990,Japan_1996,Japan_2005,Japan_2010,Japan_2012,Philippines_2016,India_2017)

# NTU Sentiment Score
Positive <- read_csv("NTUSD_positive_simplified.csv",col_names = FALSE)
Negative <- read_csv("NTUSD_negative_simplified.csv",col_names = FALSE)

Positive_CN <- c(NULL) # Convert dictionary to string
for (i in 1:dim(Positive)[1]){
  Positive_CN <- c(Positive_CN,Positive[i,1])
}

Negative_CN <- c(NULL)
for (i in 1:dim(Negative)[1]){
  Negative_CN <- c(Negative_CN,Negative[i,1])
}

Positive_Score_Mob <- c(NULL) # Calculate sentiment scores
for (i in 1:dim(Mob)){
  Positive_Score_Mob[i] <- sum(!is.na(match(Mob[i,],Positive_CN)))
}

Positive_Score_Pac <- c(NULL)
for (i in 1:dim(Pac)){
  Positive_Score_Pac[i] <- sum(!is.na(match(Pac[i,],Positive_CN)))
}

Negative_Score_Mob <- c(NULL)
for (i in 1:dim(Mob)){
  Negative_Score_Mob[i] <- sum(!is.na(match(Mob[i,],Negative_CN)))
}

Negative_Score_Pac <- c(NULL)
for (i in 1:dim(Pac)){
  Negative_Score_Pac[i] <- sum(!is.na(match(Pac[i,],Negative_CN)))
}

Word_Count_Mob <- c(NULL)
for (i in 1:dim(Mob)){
  x <- Mob[i,]
  Word_Count_Mob[i] <- length(x[!is.na(x)])
}

Word_Count_Pac <- c(NULL)
for (i in 1:dim(Pac)){
  x <- Pac[i,]
  Word_Count_Pac[i] <- length(x[!is.na(x)])
}

NTU_Mob <- as.data.frame(Positive_Score_Mob)
NTU_Mob$Negative <- Negative_Score_Mob*(-1)
NTU_Mob$Count <- Word_Count_Mob
NTU_Mob$Aggregated <- (NTU_Mob$Positive_Score_Mob + NTU_Mob$Negative)/NTU_Mob$Count
colnames(NTU_Mob) <- c("Positive","Negative","Count","Aggregated")
India1962_Vec <- rep("India_1962",dim(India_1962)[1])
Soviet1969_Vec <- rep("Soviet_1969",dim(Soviet_1969)[1])
Vietnam1974_Vec <- rep("Vietnam_1974",dim(Vietnam_1974)[1])
Vietnam1979_Vec <- rep("Vietnam_1979",dim(Vietnam_1979)[1])
Mob_Campaign <- c(India1962_Vec,Soviet1969_Vec,Vietnam1974_Vec,Vietnam1979_Vec)
NTU_Mob$Campaigns <- Mob_Campaign
NTU_Mob$Type <- "Mobilization Campaign"

NTU_Pac <- as.data.frame(Positive_Score_Pac)
NTU_Pac$Negative <- Negative_Score_Pac*(-1)
NTU_Pac$Count <- Word_Count_Pac
NTU_Pac$Aggregated <- (NTU_Pac$Positive_Score_Pac + NTU_Pac$Negative)/NTU_Pac$Count
colnames(NTU_Pac) <- c("Positive","Negative","Count","Aggregated")
Japan1990_Vec <- rep("Japan_1990",dim(Japan_1990)[1])
Japan1996_Vec <- rep("Japan_1996",dim(Japan_1996)[1])
Japan2005_Vec <- rep("Japan_2005",dim(Japan_2005)[1])
Japan2010_Vec <- rep("Japan_2010",dim(Japan_2010)[1])
Japan2012_Vec <- rep("Japan_2012",dim(Japan_2012)[1])
Philippines2016_Vec <- rep("Philippines_2016",dim(Philippines_2016)[1])
India2017_Vec <- rep("India_2017",dim(India_2017)[1])
Pac_Campaign <- c(Japan1990_Vec,Japan1996_Vec,Japan2005_Vec,Japan2010_Vec,Japan2012_Vec,
                  Philippines2016_Vec,India2017_Vec)
NTU_Pac$Campaigns <- Pac_Campaign
NTU_Pac$Type <- "Pacification Campaign"

ALL <- rbind(NTU_Mob,NTU_Pac)

# ==================== Table 7.7 =====================================================
NTU_Mob_mean <- NTU_Mob %>%
  summarise(mob_positive_average=mean(Positive),
            mob_negative_average=mean(Negative),
            mob_aggregated_average=mean(Aggregated))

NTU_Pac_mean <- NTU_Pac %>%
  summarise(pac_positive_average=mean(Positive),
            pac_negative_average=mean(Negative),
            pac_aggregated_average=mean(Aggregated))

# ==================== Table 7.7 =====================================================
mean_data <- cbind(c(NTU_Mob_mean$mob_positive_average,NTU_Mob_mean$mob_negative_average,NTU_Mob_mean$mob_aggregated_average),
                   c(NTU_Pac_mean$pac_positive_average,NTU_Pac_mean$pac_negative_average,NTU_Pac_mean$pac_aggregated_average))
colnames(mean_data) <- c("Mobilization Campaign","Pacification Campaign")
rownames(mean_data) <- c("Positive","Negative","Aggregated")
# xtable(mean_data) # Same with stargazer
stargazer(mean_data, type = 'latex', title = 'Average of sentiment scores') #Put Latex codes into Latex to generate table

# ttests
t.test(NTU_Mob$Positive, NTU_Pac$Positive)
t.test(NTU_Mob$Negative, NTU_Pac$Negative)
t.test(NTU_Mob$Aggregated, NTU_Pac$Aggregated)

# ANOVA
oneway.test(Positive ~ Type, data = ALL) 
oneway.test(Negative ~ Type, data = ALL) 
oneway.test(Aggregated ~ Type, data = ALL) 


# ==================== Figure 7.10 =====================================================
innerCI.n <- 1.64 #set so exclusion of CI implies two sided significance of 0.1
outerCI.n <- 1.96 #set so exclusion of CI implies two sided significance of 0.05

positive_coef <- coefplot(lm(Positive ~ Type -1, data=ALL),
                          newNames = c('TypePacification Campaign' = 'Pacification Campaigns',
                                       'TypeMobilization Campaign'= 'Mobilization Campaigns'), 
                          intercept=FALSE, lwdOuter=0.5, innerCI=innerCI.n, outerCI= outerCI.n, color = 'black',title = '') + theme_bw()
positive_coef$data$Coefficient <- factor(positive_coef$data$Coefficient,
                                         levels = rev(c('Mobilization Campaigns',
                                                        'Pacification Campaigns')))
positive_coef

negative_coef <- coefplot(lm(Negative ~ Type -1, data=ALL),
                          newNames = c('TypePacification Campaign' = 'Pacification Campaigns',
                                       'TypeMobilization Campaign'= 'Mobilization Campaigns'), 
                          intercept=FALSE, lwdOuter=0.5, innerCI=innerCI.n, outerCI= outerCI.n, color = 'black',title = '') + theme_bw()
negative_coef$data$Coefficient <- factor(negative_coef$data$Coefficient,
                                         levels = rev(c('Mobilization Campaigns',
                                                        'Pacification Campaigns')))
negative_coef

aggregated_coef <- coefplot(lm(Aggregated ~ Type -1, data=ALL),
                            newNames = c('TypePacification Campaign' = 'Pacification Campaigns',
                                         'TypeMobilization Campaign'= 'Mobilization Campaigns'), 
                            intercept=FALSE, lwdOuter=0.5, innerCI=innerCI.n, outerCI= outerCI.n, color = 'black',title = '') + theme_bw()
aggregated_coef$data$Coefficient <- factor(aggregated_coef$data$Coefficient,
                                           levels = rev(c('Mobilization Campaigns',
                                                          'Pacification Campaigns')))
aggregated_coef

figure_coef <- ggarrange(positive_coef, negative_coef, aggregated_coef,
                         labels = c('Postive Scores','Negative Scores','Aggregated Scores'),
                         ncol = 1, nrow = 3)
figure_coef


# ==================== Figure 7.11 =====================================================
labels_boxplot <- c(India_1962 = 'India 1962', Soviet_1969 = 'Soviet 1969', Vietnam_1974 = 'Vietnam 1974',
                    Vietnam_1979 = 'Vietnam 1979', Japan_1990 = 'Japan 1990',Japan_1996 = 'Japan 1996',
                    Japan_2005 = 'Japan 2005', Japan_2010 = 'Japan 2010', Japan_2012 = 'Japan 2012',
                    Philippines_2016 = 'Philippines 2016', India_2017 = 'India 2017')

boxplot <- ggplot(ALL, aes(x = Aggregated, y = Campaigns, colour = Type))+
  geom_jitter(shape = 15, alpha = 0.15,
              color = "steelblue",
              position = position_jitter(0.21)) +
  geom_boxplot()+
  geom_vline(aes(xintercept = 0),colour = 'black',linetype = 'dashed')+
  ylab("Media Campaigns")+
  scale_color_manual(values = c('indianred','seagreen'))

boxplot$data$Campaigns <- factor(boxplot$data$Campaigns, 
                                 levels = rev(c("India_1962","Soviet_1969","Vietnam_1974",
                                                "Vietnam_1979","Japan_1990","Japan_1996",
                                                "Japan_2005","Japan_2010","Japan_2012",
                                                "Philippines_2016","India_2017")),
                                 ordered = TRUE)

boxplot <- boxplot + scale_y_discrete(labels = labels_boxplot)

boxplot
